*** This file runs regressions to generate estimates of average effects across different specifications
*** Here we use the CEP policy categories and 2015-2019 as the post period
*** Drop Switcher States. Least-Skilled Treated Group
*** Only specifications with no time-varying covariates.




capture log close 
clear all
set more off
set trace off

* If needed change global path to point to directory where files are stored on your computer
*global path "I:/DataSets5/Duncan/Dropbox/Recent Minimum Wage Changes/2020.12 NBER Update/JOLE Precommittment Replication"
global logdir "$path/Logfiles/Test Bootstrap"

log using "$logdir/boot-ow-mw-cep-lowskill-noswitchers-2015-2019-no-covars.log", replace


*Set seed
set seed 123456

* 2) Define bootstrap program for stratified sampling
capture program drop bs_strat
program define bs_strat, rclass

	*------------------------ 1.1 Syntax -------------------------- 

	syntax, postmin(real) categories(string) sample(string) switchers(string)

	
	*------------------------ 1.2 Set Up -------------------------- 
	
	* Get labels for sampled states 
	labmask stateid, values(statefip) decode
	
	tab stateid policygroup	
	
	*------------------------ 1.3 DD Regression Hourly and Minimum Wages --------------------------  	

	* CPS Regressions Own wage
	reghdfe hourwage i.StatIncreaserLarge##i.post i.StatIncreaserSmall##i.post i.indexer##i.post [aw=earnwt] if lowskill == 1 & cps == 1, absorb(i.month##i.year i.stateid) cluster(stateid) compact
	gen ow_beta_cps_large1=(_b[1.StatIncreaserLarge#1.post])
	gen ow_beta_cps_small1=(_b[1.StatIncreaserSmall#1.post])
	gen ow_beta_cps_index1=(_b[1.indexer#1.post])

	reghdfe hourwage i.StatIncreaserLarge##i.post i.StatIncreaserSmall##i.post i.indexer##i.post [aw=earnwt] if lowskill == 1 & cps == 1, absorb(i.month##i.year i.stateid i.age i.educ) cluster(stateid) compact
	gen ow_beta_cps_large5=(_b[1.StatIncreaserLarge#1.post])
	gen ow_beta_cps_small5=(_b[1.StatIncreaserSmall#1.post]) 
	gen ow_beta_cps_index5=(_b[1.indexer#1.post])

	* CPS Regressions Effective Minimum Wage 
	reghdfe effectiveminwage i.StatIncreaserLarge##i.post i.StatIncreaserSmall##i.post i.indexer##i.post [aw=perwt] if lowskill == 1 & cps == 1, absorb(i.month##i.year i.stateid) cluster(stateid) compact
	gen mw_beta_cps_large1=(_b[1.StatIncreaserLarge#1.post])
	gen mw_beta_cps_small1=(_b[1.StatIncreaserSmall#1.post])
	gen mw_beta_cps_index1=(_b[1.indexer#1.post])
	
	reghdfe effectiveminwage i.StatIncreaserLarge##i.post i.StatIncreaserSmall##i.post i.indexer##i.post [aw=perwt] if lowskill == 1 & cps == 1, absorb(i.month##i.year i.stateid i.age i.educ) cluster(stateid) compact
	gen mw_beta_cps_large5=(_b[1.StatIncreaserLarge#1.post])
	gen mw_beta_cps_small5=(_b[1.StatIncreaserSmall#1.post]) 
	gen mw_beta_cps_index5=(_b[1.indexer#1.post])
	
	* Average coefficients across models and store as scalar for each bootstrap replication
	
	* All samples
	egen ow_beta_all = rowmean(ow_beta_cps*)
	egen ow_beta_large = rowmean(ow_beta_cps_large*)
	egen ow_beta_small = rowmean(ow_beta_cps_small*)
	egen ow_beta_indexer = rowmean(ow_beta_cps_index*)
	
	egen mw_beta_all = rowmean(mw_beta_cps*)
	egen mw_beta_large = rowmean(mw_beta_cps_large*)
	egen mw_beta_small = rowmean(mw_beta_cps_small*)
	egen mw_beta_indexer = rowmean(mw_beta_cps_index*)

	
	* Return means as scalars
	foreach var of varlist ow_beta_all-mw_beta_indexer {
		sum `var', meanonly
		return scalar mean_`var' = r(mean)
	}
	
	* Drop created variables
	drop ow_beta* mw_beta*
end
				   


*****************************************************************************************************
************					   3. Set up data							            ************
***************************************************************************************************** 


*** Assemble relevant years of the basic monthly CPS
use "$dtadir/CPS-2019.dta", clear

drop if year < 2011

*** Drop seniors
drop if age >= 65 | age < 16
drop if empstat == 0

*** Construct economic outcomes of interest 

* if empstat = 10: "At work"
* if empstat = 12: "employed, not at work last week

gen employed = 0
replace employed = 1 if  empstat == 10 |  empstat == 12

*** Assume that armed forces are employed
replace employed = 1 if empstat == 1

**** Construct education variables
gen dropout = 0 
replace dropout = 1 if educ < 73
gen highschool = 0 
replace highschool = 1 if educ == 73
gen somecollege = 0
replace somecollege = 1 if educ >= 81 & educ <= 92
gen collegeplus = 0
replace collegeplus = 1 if educ >= 111

gen quarter = 1 if inlist(month,1,2,3)
replace quarter = 2 if inlist(month,4,5,6)
replace quarter = 3 if inlist(month,7,8,9)
replace quarter = 4 if inlist(month,10,11,12)

gen time = (100*year) + month

*** Merge in HPI data
merge m:1 statefip year quarter using "$dtadir/HPI_2019.dta"
drop if _merge == 2
drop _merge

replace HPI = HPI/1000

*** Merge in personal income data
merge m:1 statefip year quarter using "$dtadir/PersonalIncome_2019.dta"
gen lnPersonalIncome = ln(PersonalIncome)
drop if _merge == 2
drop _merge

** creates mid-skill employment rate 
gen group = 0 
replace group = 1 if (age <= 30 & age > 21 & highschool == 1) | (age > 30 & age <= 45 & dropout == 1) | (age > 45 & age < 65 & dropout == 1) 

egen stateempD = mean(employed) if group == 1, by(year month statefip) 
egen stateempE = max(stateempD), by(year month statefip) 

gen lowskill = 0 
replace lowskill = 1 if inrange(age,16,25) & dropout == 1

gen young = 0 
replace young = 1 if inrange(age,16,21)

gen primeage = 0
replace primeage = 1 if inrange(age,26,54)

keep if lowskill == 1

*** Generate indicators if receive tips/overtime, paid hourly, or have wage rates imputed
gen tippedorovertime = otpay == 2
gen hourly = paidhour == 2 
gen notimputed = qhourwag == 0 
gen notimputedB = (qhourwag == 0 & qearnwee == 0)

*** Keep only people eligible for the ORG sample for hourly wage regressions
replace hourwage =. if eligorg != 1

*** Keep only people are employed for hourly wage regressions
replace hourwage =. if employed != 1

*** Keep only people paid by the hour for hourly wage regressions
replace hourwage =. if hourly != 1

*** Keep only people who do not have imputed wage rates for hourly wage regressions
replace hourwage =. if notimputed != 1

* Adjust NIU cases for hourly wages for hourly wage regressions
replace hourwage =. if hourwage == 999.99

gen acs = 0
gen cps = 1

gen perwt = wtfinl

keep hourwage year month statefip lnPersonalIncome HPI time educ age stateempE lowskill acs cps perwt earnwt

compress

* generate post variable 
cap drop post 
gen post = 0 if inrange(year,2011,2013) 
replace post = 1 if inrange(year,2015,2019)

* Keep only needed observations to reduce memory
drop if missing(post)
keep if lowskill == 1


* merge in policy categories 
cap drop originaltype-increase5 
merge m:1 statefip using "$dtadir/min_wage_variables_for_ACS_and_CPS_analysis.dta", nogen keepusing(originaltype jan*min) 

cap drop indexer StatIncreaserLarge StatIncreaserSmall statutoryincreasein2014or2015 statutoryincreasein2014to2017 statutoryincreasein2014to2018

gen indexer = 0 
gen StatIncreaserLarge = 0 
gen StatIncreaserSmall = 0 
gen statutoryincreasein2014or2015 = 0
gen statutoryincreasein2014to2018 = 0

* CEP Categories
replace indexer = 1 if originaltype == "Indexer" 
replace statutoryincreasein2014or2015 = 1 if (jan2016min - jan2013min) > 0 & indexer == 0 
replace StatIncreaserLarge = 1 if indexer == 0 & (jan2015min - jan2013min) >= 1 & (jan2016min - jan2013min) != . 
replace StatIncreaserSmall = 1 if indexer == 0 & statutoryincreasein2014or2015 == 1 & StatIncreaserLarge == 0 

* Generate January minimum wage variable
gen effectiveminwage =.
forvalues i=2011/2019 {
	replace effectiveminwage = jan`i'min if year == `i'
}


* Drop switcher states
drop if inlist(statefip,4,8,23,29,41,50,53)

* Generate policygroup variable for doing proportional sampling correctly
gen policygroup = 1
replace policygroup = 2 if indexer == 1
replace policygroup = 3 if StatIncreaserSmall == 1
replace policygroup = 4 if StatIncreaserLarge == 1

* Take ln of hourly and minimum wages
gen ln_hourwage = ln(hourwage)
gen ln_effectiveminwage = ln(effectiveminwage)


compress


cd "$estdir/Bootstrap/no-tvc"

timer clear
timer on 1

* Run bootstrap command for stratified sampling for all means, ACS means, and CPS means for all changer states, large increasers, small increasers, and indexers.

* Policy categories: cep. Sample: lowskill. Switcher states: noswitchers. Post start: 2015 post end: 2019.
bootstrap ow_beta_allchange=r(mean_ow_beta_all) ow_beta_large=r(mean_ow_beta_large) ow_beta_small=r(mean_ow_beta_small) ow_beta_index=r(mean_ow_beta_indexer) ///
mw_beta_allchange=r(mean_mw_beta_all) mw_beta_large=r(mean_mw_beta_large) mw_beta_small=r(mean_mw_beta_small) mw_beta_index=r(mean_mw_beta_indexer), ///
rep(100) cluster(statefip) strata(policygroup) idcluster(stateid) saving("boot-ow-mw-cep-lowskill-noswitchers-2015-2019", replace): bs_strat, postmin(2015) categories(cep) sample(lowskill) switchers(noswitchers)


* Display all bootstrap statistics
*estat bootstrap, all

* Save data from original call with data as is.
mat res = r(table) 
svmat res, names(col)
gen stat = ""
gen n = _n
replace stat = "beta" if n == 1
replace stat = "boot_se" if n == 2
replace stat = "z-score" if n == 3
replace stat = "pval" if n == 4
replace stat = "lo_ci" if n == 5
replace stat = "hi_ci" if n == 6

keep ow_beta_allchange-stat

* Add labels to bootstrap code sample
cap drop policycat sample switchers postmin postmax
gen policycat = "cep"
gen sample = "lowskill"
gen switchers = "noswitchers"
gen postmin = 2015
gen postmax = 2019

drop if missing(stat)

save "$estdir/Bootstrap/no-tvc/coef-ow-mw-cep-lowskill-noswitchers-2015-2019.dta", replace

* Add labels to bootstrap code sample
use "boot-ow-mw-cep-lowskill-noswitchers-2015-2019.dta", clear
cap drop policycat sample switchers postmin postmax
gen policycat = "cep"
gen sample = "lowskill"
gen switchers = "noswitchers"
gen postmin = 2015
gen postmax = 2019
gen iteration = _n

label var ow_beta_allchange "Own Wage Estimate All Changers"
label var ow_beta_large "Own Wage Estimate Large Increasers"
label var ow_beta_small "Own Wage Estimate Small Increasers"
label var ow_beta_index "Own Wage Estimate Indexers"

label var mw_beta_allchange "Min Wage Estimate All Changers"
label var mw_beta_large "Min Wage Estimate Large Increasers"
label var mw_beta_small "Min Wage Estimate Small Increasers"
label var mw_beta_index "Min Wage Estimate Indexers"

label var policycat "CEP or New Policy Categories"
label var sample "Least-Skilled or Young Sample"
label var switchers "Switchers or No Switchers"
label var postmin "First Year of Post Period"
label var postmax "Last Year Post Period"
label var iteration "Bootstrap Iteration Number"

save "$estdir/Bootstrap/no-tvc/boot-ow-mw-cep-lowskill-noswitchers-2015-2019.dta", replace

di "Policy categories: cep. Sample: lowskill. Switcher states: noswitchers. Post start: 2015 post end: 2019. Done" 


timer off 1
timer list 1
log close

